
library(dplyr)
library(estimatr)
library(ggplot2)
library(tidyr)
library(mediation)
library(ri2)
library(huxtable)
options(java.parameters = "-Xmx6g")
library(here)

i_am("3_results.R")

# load and recode data ####

# move data file to same folder as code, or set file path here
dat <- read.csv(here("Data", "combined_dataset.csv"))

dat <- dat %>% 
  mutate(matched = case_when(is.na(posterior)~0,
                             T~1))

dat <- dat %>%
  mutate(sex = case_when(x_f_ch_male==1|x_f_ad_male==1~"M",
                         x_f_ch_male==0|x_f_ad_male==0~"F",
                         is.na(x_f_ch_male)&is.na(x_f_ad_male)&f_svy_gender=="M"~"M",
                         is.na(x_f_ch_male)&is.na(x_f_ad_male)&f_svy_gender=="F"~"F"))

dat <- dat %>%
  mutate(age_group = case_when((ra_year - f_svy_yob_imp_ytgc)>=13~"old_kid",
                               (ra_year - f_svy_yob_imp_ytgc)<13~"young_kid",
                               f_svy_sample2007 %in% c("AD", "ES")~"adult"))

#remove anyone without randomization group
dat <- dat %>% filter(!is.na(ra_group)&!is.na(age_group))

#treat incorrect matches as non-matches
#if someone voted before age 18, set all to 0
dat$badmatch <- ifelse(!is.na(dat$r_pretreatturnout)&dat$age_group!="adult", 1, 0)

dat$matched <- ifelse(dat$badmatch==1, 0, dat$matched)

# set turnout to 0 if someone didn't match to the voter file
dat <- dat %>% 
  mutate(r_postturnout = case_when(matched==0~0,
                                   T~r_postturnout)) %>%
  mutate(r_pretreatturnout = case_when(matched==0~0,
                                       T~r_pretreatturnout))
dat$r_postturnout <- ifelse(is.na(dat$r_postturnout), 0, dat$r_postturnout)

# create ever voted indicator
dat <- dat %>%
  mutate(evervoted_post = case_when(r_postturnout>0~1,
                                    T~0)) %>%
  mutate(evervoted_pre = case_when(r_pretreatturnout>0~1,
                                   T~0))
dat <- dat %>%
  mutate(r_postturnout = case_when(badmatch==1~0,
                                   T~r_postturnout)) %>%
  mutate(r_pretreatturnout = case_when(badmatch==1~0,
                                       T~r_pretreatturnout)) %>%
  mutate(r_postregturnout = case_when(badmatch==1~NA_real_,
                                      T~r_postregturnout)) %>%
  mutate(evervoted_post = case_when(badmatch==1~0,
                                    T~evervoted_post)) %>%
  mutate(evervoted_pre = case_when(badmatch==1~0,
                                   T~evervoted_pre)) 


#covariates for models with controls
covs_ad <- c("x_f_ad_36_40", "x_f_ad_41_45", "x_f_ad_46_50",
             "x_f_ad_edged", "x_f_ad_edgradhs", "x_f_ad_edgradhs_miss", "x_f_ad_edinsch",
             "x_f_ad_ethn_hisp", "x_f_ad_le_35", "x_f_ad_male",
             "x_f_ad_nevmarr", "x_f_ad_parentu18", "x_f_ad_race_black", "x_f_ad_race_other", "x_f_ad_working",
             "x_f_hh_afdc", "x_f_hh_car", "x_f_hh_disabl", "x_f_hh_noteens",
             "x_f_hh_size2", "x_f_hh_size3", "x_f_hh_size4", "x_f_hh_victim",
             "x_f_hood_5y", "x_f_hood_chat", "x_f_hood_nbrkid", "x_f_hood_nofamily",
             "x_f_hood_nofriend", "x_f_hood_unsafenit", "x_f_hood_verydissat",
             "x_f_hous_fndapt", "x_f_hous_mov3tm", "x_f_hous_movdrgs", "x_f_hous_movschl", "x_f_hous_sec8bef",
             "x_f_site_balt", "x_f_site_bos", "x_f_site_chi", "x_f_site_la")

# code racial categories
dat <- dat %>%
  mutate(race = case_when(x_f_ad_race_black==1~"black",
                          x_f_ad_ethn_hisp==1~"hisp",
                          x_f_ad_race_other==1&x_f_ad_ethn_hisp==0~"other",
                          T~"white"))
# variable for poverty of post-treatment neighborhoods
dat <- dat %>% rename("local_poverty_posttreat"=f_c9010t_perpov_dw)

# code adult education
dat <- dat %>%
  mutate(ad_educ = case_when(hed2==4~"no hs",
                             (hed2 %in% c(1,2))&hed3==5~"hs",
                             (hed4 %in% c(1,2))~"aa",
                             (hed4 %in% c(3,4))~"ba+" ))
dat$ad_educ <- factor(dat$ad_educ, 
                      levels=c("no hs", "hs", "aa", "ba+"),
                      ordered=T)
# code youth education
dat <- dat %>%
  mutate(yt_educ = case_when(yed3c==5&yed1==5~"no hs",
                             yed3c==1&(yed3a<=12)~"hs",
                             yed3a>12~"more than hs",
                             hho3 %in% c(1,2)~"hs",
                             hho3==3~"no hs",
                             hho4==1~"more than hs"))
dat$yt_educ <- factor(dat$yt_educ, 
                      levels=c("no hs", "hs", "more than hs"),
                      ordered=T)
# code family number of moves at interim
dat <- dat %>%
  group_by(mto_pseudo_famid) %>%
  mutate(fam_moves = mean(a22, na.rm=T))

# Figure 1: initial effects ####

dat <- dat %>%
  mutate(tract_unemp = 1-f_c9010t_pemp_dw) %>%
  mutate(tract_welf = f_c9010t_pwelf_dw) %>%
  mutate(tract_femalehead = f_c9010t_pfsfem_dw) %>%
  mutate(tract_minority = f_c9010t_pminorty_dw) %>%
  mutate(tract_poverty = local_poverty_posttreat) %>%
  mutate(tract_college = f_c9010t_pcolldeg_dw)

a <- dat %>%
  group_by(age_group, ra_group_factor) %>%
  summarize(across(tract_unemp:tract_college, ~weighted.mean(w=f_wt_totcore98, x=., na.rm=T))) %>%
  pivot_longer(cols=c(tract_unemp:tract_college)) %>%
  filter(age_group!="adult") %>%
  mutate(ra_group_factor = factor(ra_group_factor, levels=c("control", "section 8", "experimental"), ordered=TRUE)) %>%
  mutate(ra_group_factor = recode(ra_group_factor, "control"="Control",
                                  "section 8" = "Section 8", "experimental"="Experimental")) %>%
  mutate(name = factor(name, levels=c("tract_poverty", "tract_unemp", "tract_welf", "tract_femalehead", "tract_minority", "tract_college"))) %>%
  mutate(name = recode(name, "tract_college" = "Tract pct. college grad",
                       "tract_femalehead" = "Tract pct. female-headed",
                       "tract_minority" = "Tract pct. minority race",
                       "tract_poverty" = "Tract poverty rate",
                       "tract_unemp" = "Tract unemployment rate",
                       "tract_welf" = "Tract pct. on welfare")) %>%
  mutate(age_group = recode(age_group, "old_kid"="Teens", "young_kid"="Children")) %>%
  ggplot() +
  geom_bar(aes(x=age_group, y=value, group=ra_group_factor, fill=ra_group_factor), stat="identity", position="dodge") + 
  facet_wrap(~name, nrow = 1) + 
  theme_bw() +
  scale_fill_manual(name="Treatment", values=c("black", "gray40", "gray80")) +
  xlab("Age Group") + ylab("Duration-weighted Percentage") + 
  theme(text=element_text(size=14))
pdf(file="Figures/figure1.pdf", width=14, height=5)
a
dev.off()

dat$age_final <- 2008 - dat$f_svy_yob_imp_ytgc
dat <- dat %>%
  mutate(grad_hs = case_when(age_final<19~NA_real_,
                             yed3c==1|hho3==2~1,
                             yed3c==5|hho3 %in% c(1,3)~0)) %>%
  mutate(attend_coll = case_when(age_final<19~NA_real_,
                                 yed3e %in% c(1,2)|hho4==1~1,
                                 yed3e %in% c(3:99)|hho4==5~0)) %>%
  mutate(work = case_when(age_final<23~NA_real_,
                          yem1==1|hho5 %in% c(1,2)~1,
                          yem1>1|hho5>2~0)) %>%
  mutate(parent = case_when(yhl1d %in% c(1,2)|hho7>0~1,
                            yhl1d==5|hho7==0~0)) %>%
  mutate(jail = case_when(hho10a==1~1,
                          hho10a==5~0))

a <- dat %>%
  group_by(age_group, ra_group_factor) %>%
  summarize(across(c(grad_hs, attend_coll, work, parent, jail), ~weighted.mean(w=f_wt_totcore98, x=., na.rm=T))) %>%
  pivot_longer(cols=c(grad_hs:jail)) %>%
  filter(age_group!="adult") %>%
  mutate(age_group = recode(age_group, "old_kid"="Teens", "young_kid"="Children")) %>%
  mutate(ra_group_factor = factor(ra_group_factor, levels=c("control", "section 8", "experimental"), ordered=TRUE)) %>%
  mutate(ra_group_factor = recode(ra_group_factor, "control"="Control",
                                  "section 8" = "Section 8", "experimental"="Experimental")) %>%
  mutate(name = factor(name, levels=c("grad_hs", "attend_coll", "work", "parent", "jail"), ordered=TRUE)) %>%
  mutate(name = recode(name, "grad_hs"="Graduated HS", "attend_coll"="Attended College",
                       "work" = "Working", "parent"="Parent", "jail"="Been to Jail/Prison")) %>%
  ggplot() +
  geom_bar(aes(x=age_group, y=value, group=ra_group_factor, fill=ra_group_factor), stat="identity", position="dodge") + 
  facet_wrap(~name, nrow = 1) + 
  theme_bw() +
  scale_fill_manual(name="Treatment", values=c("black", "gray40", "gray80")) +
  xlab("Age Group") + ylab("Mean") + 
  theme(text=element_text(size=14))
pdf(file="Figures/figure1b.pdf", width=14, height=5)
a
dev.off()


# Appendix A0: Means and significance for intermediate outcomes ####
mns <- dat %>%
  group_by(age_group, ra_group_factor) %>%
  summarize(across(c(tract_unemp:tract_college, grad_hs, attend_coll, work, parent, jail), ~weighted.mean(w=f_wt_totcore98, x=., na.rm=T))) %>%
  pivot_longer(cols=c(tract_unemp:tract_college,grad_hs:jail)) %>%
  filter(age_group!="adult") %>%
  pivot_wider(id_cols=c(age_group, name), names_from=ra_group_factor, values_from=value)

mns$p.exp <- NA
mns$p.s8 <- NA
for(i in 1:length(unique(mns$age_group))){
  for(k in 1:length(unique(mns$name))){
    f1 <- as.formula(paste0(unique(mns$name)[k], "~ra_group_factor+factor(ra_site)"))
    mod1 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group==unique(mns$age_group)[i],], clusters = mto_pseudo_famid)
    mns$p.exp[mns$age_group==unique(mns$age_group)[i]&mns$name==unique(mns$name)[k]] <- mod1$p.value["ra_group_factorexperimental"]
    mns$p.s8[mns$age_group==unique(mns$age_group)[i]&mns$name==unique(mns$name)[k]] <- mod1$p.value["ra_group_factorsection 8"]
  }
}

mns %>% 
  mutate(across(.cols=c(control:p.s8), function(x) round(x, 2))) %>%
  View()

#
# Appendix A1: Summary statistics: match rate analyses ####

dat %>%
  group_by(age_group) %>%
  summarize(matched = mean(matched, na.rm=T))

dat %>%
  group_by(ra_group_factor) %>%
  summarize(matched = mean(matched, na.rm=T))


dat %>%
  group_by(age_group, ra_group_factor) %>%
  summarize(matched = mean(matched, na.rm=T))

dat %>%
  group_by(ra_group_factor) %>%
  summarize(turnout = mean(r_pretreatturnout, na.rm=T))


dat %>% 
  group_by(ad_educ) %>%
  summarize(mean(matched),
            mean(r_postregturnout, na.rm=T),
            mean(evervoted_post))

dat %>% 
  filter(f_svy_yob_imp_ytgc<1990) %>%
  group_by(yt_educ) %>%
  summarize(mean(matched),
            mean(r_postregturnout, na.rm=T),
            mean(evervoted_post),
            mean(r_postturnout))

dat <- dat %>%
  mutate(pov_tile = ntile(local_poverty_posttreat, n=4))
dat %>%
  group_by(pov_tile) %>%
  summarize(mean(matched),
            mean(evervoted_post),
            mean(r_postturnout),
            mean(r_postregturnout, na.rm=T))

# joint significance test

summary(lm_robust(matched~factor(ra_site)+race+x_f_ad_nevmarr+x_f_ad_edgradhs+x_f_ad_working, data=dat, weights=f_wt_totcore98))
summary(lm_robust(matched~factor(ra_site)+race+x_f_ad_nevmarr+x_f_ad_edgradhs+x_f_ad_working+sex, data=dat, weights=f_wt_totcore98))



# Figure 2: match rates by group ####
a <- dat %>%
  group_by(matched, ra_group_factor) %>%
  summarize(baltimore=weighted.mean(ra_site==1, w=f_wt_totcore98, na.rm=T),
            boston=weighted.mean(ra_site==2, w=f_wt_totcore98, na.rm=T),
            chicago=weighted.mean(ra_site==3, w=f_wt_totcore98, na.rm=T),
            la=weighted.mean(ra_site==4, w=f_wt_totcore98, na.rm=T),
            nyc=weighted.mean(ra_site==5, w=f_wt_totcore98, na.rm=T),
            female=weighted.mean(sex=="F", w=f_wt_totcore98, na.rm=T),
            hisp=weighted.mean(x_f_ad_ethn_hisp==1, w=f_wt_totcore98, na.rm=T),
            black=weighted.mean(x_f_ad_race_black==1, w=f_wt_totcore98, na.rm=T),
            white=weighted.mean(x_f_ad_race_black==0&x_f_ad_race_other==0&x_f_ad_ethn_hisp==0, w=f_wt_totcore98, na.rm=T),
            nevermar=weighted.mean(x_f_ad_nevmarr==1, w=f_wt_totcore98, na.rm=T),
            hs=weighted.mean(x_f_ad_edgradhs==1&x_f_ad_edgradhs_miss==0, w=f_wt_totcore98, na.rm=T),
            work=weighted.mean(x_f_ad_working, w=f_wt_totcore98, na.rm=T)) %>%
  pivot_longer(baltimore:work, names_to="var", values_to="val") %>%
  mutate(var = recode(var, "baltimore"="Baltimore", 
                      "boston"="Boston", 
                      "chicago"="Chicago", 
                      "la"="Los Angeles",
                      "nyc"="NYC",
                      "female"="Female",
                      "black"="HH Black",
                      "hisp"="HH Hisp.",
                      "white"="HH White",
                      "nevermar"="HH Never Married",
                      "work"="HH Working",
                      "hs"="HH HS Grad")) %>%
  mutate(var = factor(var, levels=rev(c("Baltimore", "Boston", "Chicago", "Los Angeles",
                                        "NYC", "Female", "HH Hisp.", "HH Black", "HH White",
                                        "HH Never Married", "HH Working", "HH HS Grad")), 
                      ordered=TRUE)) %>%
  mutate(matched = case_when(matched==0~"Unmatched",
                             matched==1~"Matched")) %>%
  mutate(ra_group_factor = case_when(ra_group_factor=="control"~"Control",
                                     ra_group_factor=="experimental"~"Experimental",
                                     ra_group_factor=="section 8"~"Section 8")) %>%
  ggplot() +
  geom_bar(aes(y=var, x=val, group=matched, fill=matched), 
           stat="identity", position="dodge", color="black") + 
  theme_bw() + xlab("Proportion of Sample") + ylab("Variable") +
  scale_fill_manual(name="Match Status", 
                    #labels=c("Control", "Experimental"),
                    values=c("gray60", "black")) + 
  theme(text=element_text(size=15)) + 
  facet_wrap(~ra_group_factor) +
  ggtitle("Demographics of Matched and Unmatched Samples")
pdf(file="Figures/figure2.pdf", width=8, height=8)
a
dev.off()




# Appendix A1.2: match analyses: sex and marriage ####

dat %>%
  group_by(sex, age_group) %>%
  summarize(mean(matched))

dat %>%
  group_by(hrl1) %>%
  summarize(mean(matched))
summary(lm(matched~factor(hrl1), weights=f_wt_totcore98, data=dat %>% filter(sex=="F"&x_f_ad_nevmarr %in% c(0,1))))

dat %>%
  filter(age_group=="adult"&x_f_ad_nevmarr %in% c(0,1)) %>%
  group_by(x_f_ad_nevmarr, sex) %>%
  summarize(mean(matched), n())
summary(lm(matched~x_f_ad_nevmarr, weights=f_wt_totcore98, data=dat %>% filter(age_group=="adult"&sex=="F"&x_f_ad_nevmarr %in% c(0,1))))

dat %>%
  filter(age_group=="adult"&hrl1!=1) %>%
  group_by(ad_educ) %>%
  summarize(mean(matched), n())

dat %>%
  filter(f_svy_yob_imp_ytgc>1995) %>%
  group_by(sex) %>%
  summarize(mean(matched), n())

dat %>%
  group_by(sex, age_group) %>%
  summarize(mean(posterior, na.rm=T))

### 
# Appendix A5: distribution of outcome variables ####

a <- dat %>%
  ggplot() + 
  geom_histogram(aes(x=matched, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Matched") + ylab("Density") + 
  theme(text=element_text(size=15)) + 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) + 
  scale_x_continuous(breaks=c(0,1))

b <- dat %>%
  ggplot() + 
  geom_histogram(aes(x=evervoted_post, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Ever Voted") + ylab("Density") + 
  theme(text=element_text(size=15)) + 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) + 
  scale_x_continuous(breaks=c(0,1))

c <- dat %>%
  filter(matched==1) %>%
  ggplot() + 
  geom_histogram(aes(x=evervoted_post, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Ever Voted") + ylab("Density") + 
  theme(text=element_text(size=15)) + 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) + 
  scale_x_continuous(breaks=c(0,1))

d <- dat %>%
  ggplot() + 
  geom_histogram(aes(x=r_postturnout, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Turnout Rate") + ylab("Density") + 
  theme(text=element_text(size=15)) + 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) 

e <- dat %>%
  filter(matched==1) %>%
  ggplot() + 
  geom_histogram(aes(x=r_postturnout, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Turnout Rate") + ylab("Density") + 
  theme(text=element_text(size=15)) + 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) 

f <- dat %>%
  ggplot() + 
  geom_histogram(aes(x=r_postregturnout, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Turnout Rate- Registered") + ylab("Density") + 
  theme(text=element_text(size=15))+ 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) 
g <- dat %>%
  filter(matched==1) %>%
  ggplot() + 
  geom_histogram(aes(x=r_postregturnout, y=..density.., weight=f_wt_totcore98,
                     group=age_group, fill=age_group), binwidth=.1, position="dodge") + 
  theme_bw() + xlab("Turnout Rate- Registered") + ylab("Density") + 
  theme(text=element_text(size=15))+ 
  scale_fill_discrete(name="Age Group", labels=c("Adult", "Older Youth", "Younger Youth")) 


pdf("Figures/suppfigure_a5_p1.pdf")
a
dev.off()
pdf("Figures/suppfigure_a5_p2.pdf")
b
dev.off()
pdf("Figures/suppfigure_a5_p3.pdf")
c
dev.off()
pdf("Figures/suppfigure_a5_p4.pdf")
d
dev.off()
pdf("Figures/suppfigure_a5_p5.pdf")
e
dev.off()
pdf("Figures/suppfigure_a5_p6.pdf")
f
dev.off()
pdf("Figures/suppfigure_a5_p7.pdf")
g
dev.off()


#
# Table 1: turnout analyses ####

# means 
dat %>%
  group_by(ra_group_factor) %>%
  summarize(matched = mean(matched), 
            evervote = mean(evervoted_post),
            voterate = mean(r_postturnout),
            votepostreg = mean(r_postregturnout, na.rm=T))

# Appendix A3: full table results ####
## table 1: predict matching #
f1 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f2 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f3 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))

mod1 <- lm_robust(matched~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- lm_robust(matched~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- lm_robust(matched~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Voter registration/match rate by MTO treatment") %>%
  print_latex()

mod2 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod4 <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod6 <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod2)
summary(mod4)
summary(mod6)

huxreg("Adults"=mod2, "Age 13-19 at baseline"=mod4, "Age 0-12 at baseline"=mod6, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Baltimore"="x_f_site_balt",
                 "Site: Boston"="x_f_site_bos",
                 "Site: Chicago"="x_f_site_chi",
                 "Site: Los Angeles"="x_f_site_la",
                 "Adult age 36-40"="x_f_ad_36_40",
                 "Adult age 41-45"="x_f_ad_41_45",
                 "Adult age 46-50"="x_f_ad_46_50",
                 "Adult educ: GED"="x_f_ad_edged",
                 "Adult educ: HS Grad"="x_f_ad_edgradhs",
                 "Adult educ: Missing"="x_f_ad_edgradhs_miss",
                 "Adult educ: In School"="x_f_ad_edinsch",
                 "Adult ethnic: Hispanic"="x_f_ad_ethn_hisp",
                 "Adult race: Black"="x_f_ad_race_black",
                 "Adult race: other"="x_f_ad_race_other",
                 "Adult never married"="x_f_ad_nevmarr",
                 "Adult <18 at first child"="x_f_ad_parentu18",
                 "Adult working"="x_f_ad_working",
                 "HH on AFDC"="x_f_hh_afdc",
                 "HH has car"="x_f_hh_car",
                 "HH member with disability"="x_f_hh_disabl",
                 "No teens in HH"="x_f_hh_noteens",
                 "HH size <=2"="x_f_hh_size2",
                 "HH size 3"="x_f_hh_size3",
                 "HH size 4"="x_f_hh_size4",
                 "HH cont. recent crime victim"="x_f_hh_victim",
                 "HH head in nbhd 5+ years"="x_f_hood_5y",
                 "HH head chats w/ nbs often"="x_f_hood_chat",
                 "HH head would tell on nbhd kid"="x_f_hood_nbrkid",
                 "HH head no family in nbhd"="x_f_hood_nofamily",
                 "HH head no friends in nbhd"="x_f_hood_nofriend",
                 "HH head feels nbhd unsafe at night"="x_f_hood_unsafenit",
                 "HH head very dissat w. nbhd"="x_f_hood_verydissat",
                 "HH head sure can find apt"="x_f_hous_fndapt",
                 "HH head moved >3x in 5 yrs"="x_f_hous_mov3tm",
                 "Top moving reason: drugs/crime"="x_f_hous_movdrgs",
                 "Top moving reason: schools"="x_f_hous_movschl",
                 "HH head applied for s8 before"="x_f_hous_sec8bef")) %>%
  insert_row(c("Omitted site: New York City", "", "", "", "", "", ""), after=3) %>%
  set_caption("Voter registration/match rate by MTO treatment") %>%
  print_latex()

## table 2: voting rate posttreatment
f1 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f2 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f3 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))

mod1 <- lm_robust(r_postturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- lm_robust(r_postturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- lm_robust(r_postturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Post-treatment turnout rate by MTO treatment") %>%
  print_latex()


mod2 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod4 <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod6 <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod2)
summary(mod4)
summary(mod6)

huxreg("Adults"=mod2, "Age 13-19 at baseline"=mod4, "Age 0-12 at baseline"=mod6, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Baltimore"="x_f_site_balt",
                 "Site: Boston"="x_f_site_bos",
                 "Site: Chicago"="x_f_site_chi",
                 "Site: Los Angeles"="x_f_site_la",
                 "Adult age 36-40"="x_f_ad_36_40",
                 "Adult age 41-45"="x_f_ad_41_45",
                 "Adult age 46-50"="x_f_ad_46_50",
                 "Adult educ: GED"="x_f_ad_edged",
                 "Adult educ: HS Grad"="x_f_ad_edgradhs",
                 "Adult educ: Missing"="x_f_ad_edgradhs_miss",
                 "Adult educ: In School"="x_f_ad_edinsch",
                 "Adult ethnic: Hispanic"="x_f_ad_ethn_hisp",
                 "Adult race: Black"="x_f_ad_race_black",
                 "Adult race: other"="x_f_ad_race_other",
                 "Adult never married"="x_f_ad_nevmarr",
                 "Adult <18 at first child"="x_f_ad_parentu18",
                 "Adult working"="x_f_ad_working",
                 "HH on AFDC"="x_f_hh_afdc",
                 "HH has car"="x_f_hh_car",
                 "HH member with disability"="x_f_hh_disabl",
                 "No teens in HH"="x_f_hh_noteens",
                 "HH size <=2"="x_f_hh_size2",
                 "HH size 3"="x_f_hh_size3",
                 "HH size 4"="x_f_hh_size4",
                 "HH cont. recent crime victim"="x_f_hh_victim",
                 "HH head in nbhd 5+ years"="x_f_hood_5y",
                 "HH head chats w/ nbs often"="x_f_hood_chat",
                 "HH head would tell on nbhd kid"="x_f_hood_nbrkid",
                 "HH head no family in nbhd"="x_f_hood_nofamily",
                 "HH head no friends in nbhd"="x_f_hood_nofriend",
                 "HH head feels nbhd unsafe at night"="x_f_hood_unsafenit",
                 "HH head very dissat w. nbhd"="x_f_hood_verydissat",
                 "HH head sure can find apt"="x_f_hous_fndapt",
                 "HH head moved >3x in 5 yrs"="x_f_hous_mov3tm",
                 "Top moving reason: drugs/crime"="x_f_hous_movdrgs",
                 "Top moving reason: schools"="x_f_hous_movschl",
                 "HH head applied for s8 before"="x_f_hous_sec8bef")) %>%
  insert_row(c("Omitted site: New York City", "", "", "", "", "", ""), after=3) %>%
  set_caption("Post-treatment turnout rate by MTO treatment") %>%
  print_latex()

## table 3: predict ever voted posttreatment
f1 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f2 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f3 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))

mod1 <- lm_robust(evervoted_post~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- lm_robust(evervoted_post~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- lm_robust(evervoted_post~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Voting at least once post-treatment by MTO treatment") %>%
  print_latex()

mod2 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod4 <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod6 <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod2)
summary(mod4)
summary(mod6)

huxreg("Adults"=mod2, "Age 13-19 at baseline"=mod4, "Age 0-12 at baseline"=mod6, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Baltimore"="x_f_site_balt",
                 "Site: Boston"="x_f_site_bos",
                 "Site: Chicago"="x_f_site_chi",
                 "Site: Los Angeles"="x_f_site_la",
                 "Adult age 36-40"="x_f_ad_36_40",
                 "Adult age 41-45"="x_f_ad_41_45",
                 "Adult age 46-50"="x_f_ad_46_50",
                 "Adult educ: GED"="x_f_ad_edged",
                 "Adult educ: HS Grad"="x_f_ad_edgradhs",
                 "Adult educ: Missing"="x_f_ad_edgradhs_miss",
                 "Adult educ: In School"="x_f_ad_edinsch",
                 "Adult ethnic: Hispanic"="x_f_ad_ethn_hisp",
                 "Adult race: Black"="x_f_ad_race_black",
                 "Adult race: other"="x_f_ad_race_other",
                 "Adult never married"="x_f_ad_nevmarr",
                 "Adult <18 at first child"="x_f_ad_parentu18",
                 "Adult working"="x_f_ad_working",
                 "HH on AFDC"="x_f_hh_afdc",
                 "HH has car"="x_f_hh_car",
                 "HH member with disability"="x_f_hh_disabl",
                 "No teens in HH"="x_f_hh_noteens",
                 "HH size <=2"="x_f_hh_size2",
                 "HH size 3"="x_f_hh_size3",
                 "HH size 4"="x_f_hh_size4",
                 "HH cont. recent crime victim"="x_f_hh_victim",
                 "HH head in nbhd 5+ years"="x_f_hood_5y",
                 "HH head chats w/ nbs often"="x_f_hood_chat",
                 "HH head would tell on nbhd kid"="x_f_hood_nbrkid",
                 "HH head no family in nbhd"="x_f_hood_nofamily",
                 "HH head no friends in nbhd"="x_f_hood_nofriend",
                 "HH head feels nbhd unsafe at night"="x_f_hood_unsafenit",
                 "HH head very dissat w. nbhd"="x_f_hood_verydissat",
                 "HH head sure can find apt"="x_f_hous_fndapt",
                 "HH head moved >3x in 5 yrs"="x_f_hous_mov3tm",
                 "Top moving reason: drugs/crime"="x_f_hous_movdrgs",
                 "Top moving reason: schools"="x_f_hous_movschl",
                 "HH head applied for s8 before"="x_f_hous_sec8bef")) %>%
  insert_row(c("Omitted site: New York City", "", "", "", "", "", ""), after=3) %>%
  set_caption("Voting at least once post-treatment by MTO treatment") %>%
  print_latex()


## table 4: voting rate postregistration
f1 <- as.formula(paste0("r_postregturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f2 <- as.formula(paste0("r_postregturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
f3 <- as.formula(paste0("r_postregturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))

mod1 <- lm_robust(r_postregturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- lm_robust(r_postregturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- lm_robust(r_postregturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Turnout rate by MTO treatment, registered voters only") %>%
  print_latex()

mod2 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod4 <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod6 <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod2)
summary(mod4)
summary(mod6)

huxreg("Adults"=mod2, "Age 13-19 at baseline"=mod4, "Age 0-12 at baseline"=mod6, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Experimental group" = "ra_group_factorexperimental",
                 "Section 8 group" = "ra_group_factorsection 8",
                 "Site: Baltimore"="x_f_site_balt",
                 "Site: Boston"="x_f_site_bos",
                 "Site: Chicago"="x_f_site_chi",
                 "Site: Los Angeles"="x_f_site_la",
                 "Adult age 36-40"="x_f_ad_36_40",
                 "Adult age 41-45"="x_f_ad_41_45",
                 "Adult age 46-50"="x_f_ad_46_50",
                 "Adult educ: GED"="x_f_ad_edged",
                 "Adult educ: HS Grad"="x_f_ad_edgradhs",
                 "Adult educ: Missing"="x_f_ad_edgradhs_miss",
                 "Adult educ: In School"="x_f_ad_edinsch",
                 "Adult ethnic: Hispanic"="x_f_ad_ethn_hisp",
                 "Adult race: Black"="x_f_ad_race_black",
                 "Adult race: other"="x_f_ad_race_other",
                 "Adult never married"="x_f_ad_nevmarr",
                 "Adult <18 at first child"="x_f_ad_parentu18",
                 "Adult working"="x_f_ad_working",
                 "HH on AFDC"="x_f_hh_afdc",
                 "HH has car"="x_f_hh_car",
                 "HH member with disability"="x_f_hh_disabl",
                 "No teens in HH"="x_f_hh_noteens",
                 "HH size <=2"="x_f_hh_size2",
                 "HH size 3"="x_f_hh_size3",
                 "HH size 4"="x_f_hh_size4",
                 "HH cont. recent crime victim"="x_f_hh_victim",
                 "HH head in nbhd 5+ years"="x_f_hood_5y",
                 "HH head chats w/ nbs often"="x_f_hood_chat",
                 "HH head would tell on nbhd kid"="x_f_hood_nbrkid",
                 "HH head no family in nbhd"="x_f_hood_nofamily",
                 "HH head no friends in nbhd"="x_f_hood_nofriend",
                 "HH head feels nbhd unsafe at night"="x_f_hood_unsafenit",
                 "HH head very dissat w. nbhd"="x_f_hood_verydissat",
                 "HH head sure can find apt"="x_f_hous_fndapt",
                 "HH head moved >3x in 5 yrs"="x_f_hous_mov3tm",
                 "Top moving reason: drugs/crime"="x_f_hous_movdrgs",
                 "Top moving reason: schools"="x_f_hous_movschl",
                 "HH head applied for s8 before"="x_f_hous_sec8bef")) %>%
  insert_row(c("Omitted site: New York City", "", "", "", "", "", ""), after=3) %>%
  set_caption("Turnout rate by MTO treatment, registered voters only") %>%
  print_latex()


# Figure 4: turnout effects plot ####

# repeat regressions for each subgroup, storing results
outs <- c("matched", "r_postturnout", "evervoted_post", "r_postregturnout")
tab <- data.frame("est"=NA, "se"=NA, "group"=NA, "model"=NA, "outcome"=NA, "cond"=NA)

for(i in 1:4){
  f1 <- as.formula(paste0(outs[i], "~", "ra_group_factor+factor(ra_site)"))
  
  mod1 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  f1 <- as.formula(paste0(outs[i], "~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0(outs[i], "~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0(outs[i], "~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab1 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  tab1$outcome <- outs[i]
  tab1$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  tab1$group <- c(rep(c("adult", "teen", "kid"), 4))
  tab1$cond <- c(rep("exp", 6), rep("s8", 6))
  tab <- rbind(tab, tab1)
}

tab <- tab[-1,]

a <- tab %>% 
  mutate(ebmin = est - 1.96*se,
         ebmax = est + 1.96*se) %>%
  mutate(group = recode(group, adult="Adult", teen="Baseline Age 13-19", kid="Baseline Age 0-12")) %>%
  mutate(group = factor(group, levels=c("Adult", "Baseline Age 13-19", "Baseline Age 0-12"), ordered=TRUE)) %>%
  mutate(outcome = recode(outcome, matched="Registered", evervoted_post="Ever Voted Post-treatment", 
                          r_postturnout="Voting Rate Post-treatment", 
                          r_postregturnout="Voting Rate Given Registration")) %>%
  mutate(outcome = factor(outcome, levels=rev(c("Registered",  "Ever Voted Post-treatment", 
                                                "Voting Rate Post-treatment", "Voting Rate Given Registration")),
                          ordered=TRUE)) %>%
  filter(cond=="exp"&model=="simple") %>%
  ggplot() +
  geom_point(aes(y=outcome, x=est)) +
  geom_errorbarh(aes(y=outcome, xmin=ebmin, xmax=ebmax)) +
  geom_vline(xintercept=0, linetype=2) +
  facet_wrap(~group, nrow=3) +
  theme_bw() +
  theme(text=element_text(size=15)) +
  ylab("Outcome") + 
  xlab("Effect of Low-Poverty Treatment")

pdf(file="Figures/figure4a.pdf", width=6, height=7)
a
dev.off()

b <- tab %>% 
  mutate(ebmin = est - 1.96*se,
         ebmax = est + 1.96*se) %>%
  mutate(group = recode(group, adult="Adult", teen="Baseline Age 13-19", kid="Baseline Age 0-12")) %>%
  mutate(group = factor(group, levels=c("Adult", "Baseline Age 13-19", "Baseline Age 0-12"), ordered=TRUE)) %>%
  mutate(outcome = recode(outcome, matched="Registered", evervoted_post="Ever Voted Post-treatment", 
                          r_postturnout="Voting Rate Post-treatment", 
                          r_postregturnout="Voting Rate Given Registration")) %>%
  mutate(outcome = factor(outcome, levels=rev(c("Registered",  "Ever Voted Post-treatment", 
                                                "Voting Rate Post-treatment", "Voting Rate Given Registration")),
                          ordered=TRUE)) %>%
  filter(cond=="s8"&model=="simple") %>%
  ggplot() +
  geom_point(aes(y=outcome, x=est)) +
  geom_errorbarh(aes(y=outcome, xmin=ebmin, xmax=ebmax)) +
  geom_vline(xintercept=0, linetype=2) +
  facet_wrap(~group, nrow=3) +
  theme_bw() +
  theme(text=element_text(size=15), axis.text.y=element_blank()) +
  ylab(" ") + 
  xlab("Effect of Section 8 Treatment")
pdf(file="Figures/figure4b.pdf", width=5, height=7)
b
dev.off()


#
# Supplemental: turnout by compliance stage ####

# look at data among people above and below thresholds on neighborhood composition variables
dat %>% 
  mutate(low_pov = local_poverty_posttreat<.20) %>%
  group_by(age_group, ra_group_factor, f_svy_cmove, low_pov, 
           f_c9010t_pcolldeg_dw>.22&f_c9010t_pemp_dw>.88&f_c9010t_pminorty_dw<.78
  ) %>%
  summarize(n=n(),
            ev = weighted.mean(evervoted_post, w=f_wt_totcore98, na.rm=T)) %>%
  filter(!is.na(low_pov)) %>%
  filter(age_group=="young_kid") %>%
  View()
dat <- dat %>%   mutate(low_pov = local_poverty_posttreat<.20)

# average number of moves
dat %>%
  filter(age_group=="young_kid") %>%
  group_by(ra_group_factor) %>%
  summarize(mean(fam_moves, na.rm=T))

#control kids who still move to low-poverty neighborhoods
weighted.mean(dat$low_pov[dat$age_group=="young_kid"&dat$ra_group_factor=="control"]==TRUE,
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="control"], na.rm=T)

#exp kids who moved
weighted.mean(dat$f_svy_cmove[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"]==1,
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"], na.rm=T)
#exp kids who moved to low-poverty neighborhoods
weighted.mean(dat$low_pov[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"]==TRUE,
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"], na.rm=T)
#voted: noncomplier (did not move)
weighted.mean(dat$evervoted_post[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$f_svy_cmove==0],
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$f_svy_cmove==0], na.rm=T)
#voted: complier (moved)
weighted.mean(dat$evervoted_post[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$f_svy_cmove==1],
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$f_svy_cmove==1], na.rm=T)
#voted: low-poverty nbhd
weighted.mean(dat$evervoted_post[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$low_pov==TRUE&dat$f_svy_cmove==1],
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$low_pov==TRUE&dat$f_svy_cmove==1], na.rm=T)
#voted: low-poverty nbhd + no moves since assignment year
weighted.mean(dat$evervoted_post[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$low_pov==TRUE&dat$f_svy_cmove==1&dat$fam_moves==0],
              w=dat$f_wt_totcore98[dat$age_group=="young_kid"&dat$ra_group_factor=="experimental"&dat$low_pov==TRUE&dat$f_svy_cmove==1&dat$fam_moves==0], na.rm=T)

#

## Figure 5: subgroup analyses ####

dat_full <- dat

# create_dfs 
boys <- dat %>% filter(sex=="M")
girls <- dat %>% filter(sex=="F")
black <- dat %>% filter(race=="black")
latino <- dat %>% filter(race=="hisp")
other <- dat %>% filter(race=="other")
baltimore <- dat %>% filter(ra_site==1)
boston <- dat %>% filter(ra_site==2)
chicago <- dat %>% filter(ra_site==3)
la <- dat %>% filter(ra_site==4)
nyc <- dat %>% filter(ra_site==5)

# Figure 5 cont.: non-site groups ####
dfs <- list(boys, girls, black, latino, other)
names <- c("boys", "girls", "black", "latino", "other")
tab <- data.frame("est"=NA, "se"=NA)

for(i in 1:length(dfs)){
  dat <- as.data.frame(dfs[i]  )
  
  f1 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(matched~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(matched~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(matched~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab1 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  tab1$group <- names[i]
  tab1$outcome <- "matched"
  tab1$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  ## table 3: predict voted postreg
  f1 <- as.formula(paste0("r_postregturnout ~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("r_postregturnout ~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("r_postregturnout ~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(r_postregturnout ~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(r_postregturnout ~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(r_postregturnout ~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab2 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  tab2$group <- names[i]
  tab2$outcome <- "postreg"
  tab2$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  
  ## table 2: voting rate posttreatment
  f1 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(r_postturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(r_postturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(r_postturnout~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab3 <-  data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                              mod2$coefficients["ra_group_factorexperimental"], 
                              mod3$coefficients["ra_group_factorexperimental"],
                              mod1c$coefficients["ra_group_factorexperimental"], 
                              mod2c$coefficients["ra_group_factorexperimental"], 
                              mod3c$coefficients["ra_group_factorexperimental"],
                              mod1$coefficients["ra_group_factorsection 8"], 
                              mod2$coefficients["ra_group_factorsection 8"], 
                              mod3$coefficients["ra_group_factorsection 8"],
                              mod1c$coefficients["ra_group_factorsection 8"],
                              mod2c$coefficients["ra_group_factorsection 8"],
                              mod3c$coefficients["ra_group_factorsection 8"]),
                      "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                             mod2$std.error["ra_group_factorexperimental"], 
                             mod3$std.error["ra_group_factorexperimental"],
                             mod1c$std.error["ra_group_factorexperimental"], 
                             mod2c$std.error["ra_group_factorexperimental"], 
                             mod3c$std.error["ra_group_factorexperimental"],
                             mod1$std.error["ra_group_factorsection 8"], 
                             mod2$std.error["ra_group_factorsection 8"], 
                             mod3$std.error["ra_group_factorsection 8"],
                             mod1c$std.error["ra_group_factorsection 8"],
                             mod2c$std.error["ra_group_factorsection 8"],
                             mod3c$std.error["ra_group_factorsection 8"]))
  tab3$group <- names[i]
  tab3$outcome <- "voteprop"
  tab3$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  ##ever voted posttreatment
  f1 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(evervoted_post~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(evervoted_post~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(evervoted_post~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab4 <-  data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                              mod2$coefficients["ra_group_factorexperimental"], 
                              mod3$coefficients["ra_group_factorexperimental"],
                              mod1c$coefficients["ra_group_factorexperimental"], 
                              mod2c$coefficients["ra_group_factorexperimental"], 
                              mod3c$coefficients["ra_group_factorexperimental"],
                              mod1$coefficients["ra_group_factorsection 8"], 
                              mod2$coefficients["ra_group_factorsection 8"], 
                              mod3$coefficients["ra_group_factorsection 8"],
                              mod1c$coefficients["ra_group_factorsection 8"],
                              mod2c$coefficients["ra_group_factorsection 8"],
                              mod3c$coefficients["ra_group_factorsection 8"]),
                      "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                             mod2$std.error["ra_group_factorexperimental"], 
                             mod3$std.error["ra_group_factorexperimental"],
                             mod1c$std.error["ra_group_factorexperimental"], 
                             mod2c$std.error["ra_group_factorexperimental"], 
                             mod3c$std.error["ra_group_factorexperimental"],
                             mod1$std.error["ra_group_factorsection 8"], 
                             mod2$std.error["ra_group_factorsection 8"], 
                             mod3$std.error["ra_group_factorsection 8"],
                             mod1c$std.error["ra_group_factorsection 8"],
                             mod2c$std.error["ra_group_factorsection 8"],
                             mod3c$std.error["ra_group_factorsection 8"]))
  tab4$group <- names[i]
  tab4$outcome <- "evervoted"
  tab4$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  tab <- bind_rows(tab, tab1, tab2, tab3, tab4)
}


# Figure 5 cont.: site groups ####

dfs <- list(baltimore, boston, chicago, la, nyc)
names <- c("baltimore", "boston", "chicago", "la", "nyc")
covs_ad <- covs_ad[!(grepl("x_f_site_", covs_ad))]
covs_old <- covs_old[!(grepl("x_f_site_", covs_old))]
covs_young <- covs_young[!(grepl("x_f_site_", covs_young))]

for(i in 1:length(dfs)){
  dat <- as.data.frame(dfs[i]  )
  
  f1 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("matched~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(matched~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(matched~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(matched~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab1 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  tab1$group <- names[i]
  tab1$outcome <- "matched"
  tab1$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  ## table 3: predict voted postreg
  f1 <- as.formula(paste0("r_postregturnout ~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("r_postregturnout ~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("r_postregturnout ~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(r_postregturnout ~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(r_postregturnout ~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(r_postregturnout ~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab2 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  
  tab2$group <- names[i]
  tab2$outcome <- "postreg"
  tab2$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  
  ## table 2: voting rate posttreatment
  f1 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("r_postturnout~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(r_postturnout~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(r_postturnout~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(r_postturnout~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab3 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  
  tab3$group <- names[i]
  tab3$outcome <- "voteprop"
  tab3$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  ## ever voted posttreatment
  f1 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f2 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  f3 <- as.formula(paste0("evervoted_post~", paste0(covs_ad, sep="+", collapse=""), "ra_group_factor"))
  
  mod1 <- lm_robust(evervoted_post~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2 <- lm_robust(evervoted_post~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3 <- lm_robust(evervoted_post~ra_group_factor, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  mod1c <- lm_robust(f1, weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
  mod2c <- lm_robust(f2, weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
  mod3c <- lm_robust(f3, weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
  
  tab4 <- data.frame("est"=c(mod1$coefficients["ra_group_factorexperimental"], 
                             mod2$coefficients["ra_group_factorexperimental"], 
                             mod3$coefficients["ra_group_factorexperimental"],
                             mod1c$coefficients["ra_group_factorexperimental"], 
                             mod2c$coefficients["ra_group_factorexperimental"], 
                             mod3c$coefficients["ra_group_factorexperimental"],
                             mod1$coefficients["ra_group_factorsection 8"], 
                             mod2$coefficients["ra_group_factorsection 8"], 
                             mod3$coefficients["ra_group_factorsection 8"],
                             mod1c$coefficients["ra_group_factorsection 8"],
                             mod2c$coefficients["ra_group_factorsection 8"],
                             mod3c$coefficients["ra_group_factorsection 8"]),
                     "se"=c(mod1$std.error["ra_group_factorexperimental"], 
                            mod2$std.error["ra_group_factorexperimental"], 
                            mod3$std.error["ra_group_factorexperimental"],
                            mod1c$std.error["ra_group_factorexperimental"], 
                            mod2c$std.error["ra_group_factorexperimental"], 
                            mod3c$std.error["ra_group_factorexperimental"],
                            mod1$std.error["ra_group_factorsection 8"], 
                            mod2$std.error["ra_group_factorsection 8"], 
                            mod3$std.error["ra_group_factorsection 8"],
                            mod1c$std.error["ra_group_factorsection 8"],
                            mod2c$std.error["ra_group_factorsection 8"],
                            mod3c$std.error["ra_group_factorsection 8"]))
  
  tab4$group <- names[i]
  tab4$outcome <- "evervoted"
  tab4$model <- c(rep("simple", 3),
                  rep("control", 3),
                  rep("simple", 3),
                  rep("control", 3))
  
  tab <- bind_rows(tab, tab1, tab2, tab3, tab4)
}

# Figure 5 cont.: plot ####

tab <- tab[-1,]
tab$pop <- rep(c("adult", "old", "young"), 160)
tab$cond <- rep(c(rep("exp", 6), rep("s8", 6)), 40)
a <- ggplot(tab %>% 
              filter(cond=="exp") %>%
              filter(model=="simple", !(pop=="adult"&group=="boys"), se!=0) %>%
              mutate(group = recode(group, boys="Male", girls="Female",
                                    black="Black", latino="Latino", other="Other Race",
                                    baltimore="Baltimore", boston="Boston", 
                                    chicago="Chicago", la="LA", nyc="NYC")) %>%
              mutate(group = factor(group, levels=c("NYC", "LA", "Chicago",
                                                    "Boston", "Baltimore",
                                                    "Other Race", "Latino", "Black",
                                                    "Male", "Female"),
                                    ordered=T)) %>%
              mutate(outcome = recode(outcome, matched="Registered", evervoted="Ever Voted",
                                      postreg="Prop. Voted Post-reg.", voteprop="Prop. Voted"))) +
  geom_point(aes(x=est, y=group,
                 color=pop, 
                 #shape=cond
  ), alpha=.7) + 
  geom_errorbarh(aes(y=group,
                     xmin=est-(1.96*se),
                     xmax=est+(1.96*se),
                     color=pop),
                 alpha=.3) + 
  geom_vline(xintercept=0, linetype=2, color="gray40") +
  theme_bw() + xlab("Estimated Treatment Effect") + ylab("Subgroup") + 
  facet_wrap(~outcome, nrow=2, scales = "free") +
  scale_color_manual(name="Sample", labels=c("Adult", "Baseline Age 13-19", "Baseline Age 0-12"),
                     values=c("black", "gray40", "gray80")) + 
  theme(text=element_text(size=15)) + theme(legend.position="bottom", axis.title.y=element_blank())

pdf(file="Figures/figure5.pdf", width=8, height=5)
a
dev.off()

# Appendix A6: heterog effects section 8 ####
b <- ggplot(tab %>% 
              filter(cond=="s8") %>%
              filter(model=="simple", !(pop=="adult"&group=="boys"), se!=0) %>%
              mutate(group = recode(group, boys="Male", girls="Female",
                                    black="Black", latino="Latino", other="Other Race",
                                    baltimore="Baltimore", boston="Boston", 
                                    chicago="Chicago", la="LA", nyc="NYC")) %>%
              mutate(group = factor(group, levels=c("NYC", "LA", "Chicago",
                                                    "Boston", "Baltimore",
                                                    "Other Race", "Latino", "Black",
                                                    "Male", "Female"),
                                    ordered=T)) %>%
              mutate(outcome = recode(outcome, matched="Registered", evervoted="Ever Voted",
                                      postreg="Prop. Voted Post-reg.", voteprop="Prop. Voted"))) +
  geom_point(aes(x=est, y=group,
                 color=pop, 
                 #shape=cond
  ), alpha=.7) + 
  geom_errorbarh(aes(y=group,
                     xmin=est-(1.96*se),
                     xmax=est+(1.96*se),
                     color=pop),
                 alpha=.3) + 
  geom_vline(xintercept=0, linetype=2, color="gray40") +
  theme_bw() + xlab("Estimated Treatment Effect") + ylab("Subgroup") + 
  facet_wrap(~outcome, nrow=2, scales = "free") +
  scale_color_manual(name="Sample", labels=c("Adult", "Baseline Age 13-19", "Baseline Age 0-12"),
                     values=c("blue", "forestgreen", "red")) + 
  theme(text=element_text(size=15)) + theme(legend.position="bottom")
pdf(file="Figures/suppfigure_A6.pdf", width=8, height=5)
b
dev.off()

#
# Appendix A3: IV analyses ####

## IV 
mod1 <- iv_robust(matched~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- iv_robust(matched~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- iv_robust(matched~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Neighborhood Poverty" = "local_poverty_posttreat",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Instrument: Treatment Assignment", "", "", "", "", "", ""), after=1) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Voter registration/match rate by MTO treatment") %>%
  print_latex()

mod1 <- iv_robust(r_postturnout~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- iv_robust(r_postturnout~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- iv_robust(r_postturnout~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Neighborhood Poverty" = "local_poverty_posttreat",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Instrument: Treatment Assignment", "", "", "", "", "", ""), after=1) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Post-treatment turnout rate by MTO treatment") %>%
  print_latex()

mod1 <- iv_robust(evervoted_post~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- iv_robust(evervoted_post~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- iv_robust(evervoted_post~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)
summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Neighborhood Poverty" = "local_poverty_posttreat",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Instrument: Treatment Assignment", "", "", "", "", "", ""), after=1) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Voting at least once post-treatment by MTO treatment") %>%
  print_latex()


mod1 <- iv_robust(r_postregturnout~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="adult",], clusters = mto_pseudo_famid)
mod3 <- iv_robust(r_postregturnout~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid)
mod5 <- iv_robust(r_postregturnout~local_poverty_posttreat+factor(ra_site) | ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid)

summary(mod1)
summary(mod3)
summary(mod5)

huxreg("Adults"=mod1, "Age 13-19 at baseline"=mod3, "Age 0-12 at baseline"=mod5, 
       statistics=c("N"="nobs", "R-squared"="r.squared"), 
       error_pos = "right",  
       note="Robust standard errors clustered by family.",
       coefs = c("Neighborhood Poverty" = "local_poverty_posttreat",
                 "Site: Boston"="factor(ra_site)2",
                 "Site: Chicago"="factor(ra_site)3",
                 "Site: Los Angeles"="factor(ra_site)4",
                 "Site: New York City"="factor(ra_site)5")) %>%
  insert_row(c("Instrument: Treatment Assignment", "", "", "", "", "", ""), after=1) %>%
  insert_row(c("Omitted site: Baltimore", "", "", "", "", "", ""), after=3) %>%
  set_caption("Turnout rate by MTO treatment, registered voters only") %>%
  print_latex()


#
## Appendix A2: match comparison ####


View(dat %>% 
       filter(age_group=="adult") %>%
       group_by(ad_educ) %>%
       filter(x_f_ad_nevmarr==1) %>%
       summarize(weighted.mean(matched, w=f_wt_totcore98),
                 weighted.mean(evervoted_post, w=f_wt_totcore98),
                 weighted.mean(r_postturnout, w=f_wt_totcore98),
                 weighted.mean(r_postregturnout, w=f_wt_totcore98, na.rm=T),
                 n=n()))

View(dat %>% 
       filter(age_group=="adult") %>%
       group_by(ad_educ) %>%
       filter(a22 %in% c(0,1)) %>%
       summarize(weighted.mean(matched, w=f_wt_totcore98),
                 weighted.mean(evervoted_post, w=f_wt_totcore98),
                 weighted.mean(r_postturnout, w=f_wt_totcore98),
                 weighted.mean(r_postregturnout, w=f_wt_totcore98, na.rm=T),
                 n=n()))

View(dat %>% 
       filter(f_svy_yob_imp_ytgc<1990) %>%
       group_by(yt_educ) %>%
       summarize(weighted.mean(matched, w=f_wt_totcore98),
                 weighted.mean(evervoted_post, w=f_wt_totcore98),
                 weighted.mean(r_postturnout, w=f_wt_totcore98),
                 weighted.mean(r_postregturnout, w=f_wt_totcore98, na.rm=T),
                 n=n()))

View(dat %>% 
       filter(age_group=="young_kid") %>%
       group_by(x_f_c2_read) %>%
       summarize(weighted.mean(matched, w=f_wt_totcore98),
                 weighted.mean(evervoted_post, w=f_wt_totcore98),
                 weighted.mean(r_postturnout, w=f_wt_totcore98),
                 weighted.mean(r_postregturnout, w=f_wt_totcore98, na.rm=T),
                 n=n()))

View(dat %>% 
       group_by(age_group) %>%
       summarize(weighted.mean(matched, w=f_wt_totcore98),
                 n=n()))

dat <- dat %>%
  mutate(pov_tile = ntile(local_poverty_posttreat, n=4))

View(dat %>% 
       group_by(pov_tile) %>%
       summarize(weighted.mean(matched, w=f_wt_totcore98),
                 weighted.mean(evervoted_post, w=f_wt_totcore98),
                 weighted.mean(r_postturnout, w=f_wt_totcore98),
                 weighted.mean(r_postregturnout, w=f_wt_totcore98, na.rm=T),
                 n=n()))

# Appendix A7: mediation ####

dat <- dat_full

dat <- dat %>% filter(ra_group_factor!="section 8") %>% filter(age_group=="old_kid")
med.mod <- lm(local_poverty_posttreat~ra_group_factor+factor(ra_site), 
              data=dat,
              weights=f_wt_totcore98)
out.mod <- lm(evervoted_post~local_poverty_posttreat+ra_group_factor+factor(ra_site), 
              data=dat,
              weights=f_wt_totcore98)
med.out <- mediate(med.mod, out.mod, 
                   treat = "ra_group_factor", 
                   mediator = "local_poverty_posttreat",
                   sims = 100,
                   data = dat, 
                   cluster = out.mod$model$mto_pseudo_famid)
summary(med.out)

mods <- mediations(datasets=list(d1=dat),
                   treatment="ra_group_factor",
                   mediators=c("local_poverty_posttreat"),
                   outcome="evervoted_post")


# other variables
dat <- dat %>%
  mutate(grad_hs = case_when(age_final<19~NA_real_,
                             yed3c==1|hho3==2~1,
                             yed3c==5|hho3 %in% c(1,3)~0)) %>%
  mutate(attend_coll = case_when(age_final<19~NA_real_,
                                 yed3e %in% c(1,2)|hho4==1~1,
                                 yed3e %in% c(3:99)|hho4==5~0))
dat <- dat %>%
  mutate(work = case_when(age_final<23~NA_real_,
                          yem1==1|hho5 %in% c(1,2)~1,
                          yem1>1|hho5>2~0)) %>%
  mutate(married = case_when(hho6==2~1,
                             hho6 %in% c(1,3)~0),
         kids = case_when(hho7==0~0,
                          hho6 %in% c(1,2,3)~1)) %>%
  mutate(jail = case_when(hho10a==1~1,
                          hho10a==5~0)) %>%
  mutate(tract_unemp = 1-f_c9010t_pemp_dw) %>%
  mutate(tract_welf = f_c9010t_pwelf_dw) %>%
  mutate(tract_femalehead = f_c9010t_pfsfem_dw) %>%
  mutate(tract_minority = f_c9010t_pminorty_dw)

dat$interim_moves <- NA
for(i in 1:nrow(dat)){
  famid = dat$mto_pseudo_famid[i]
  num = dat$a22[dat$mto_pseudo_famid==famid]
  #num = dat_full$a22[dat$mto_pseudo_famid==famid]
  num = num[!is.na(num)]
  if(length(num)>0){
    dat$interim_moves[i] <- num
  }
}
dat$interim_moves[dat$interim_moves==-1] <- NA

summary(lm_robust(grad_hs~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(attend_coll~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(work~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(married~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(kids~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(jail~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_unemp~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_welf~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_femalehead~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_minority~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(interim_moves~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="old_kid",], clusters = mto_pseudo_famid))


mods <- mediations(datasets=list(d1=dat[dat$age_group=="old_kid",]),
                   treatment="ra_group_factor",
                   mediators=c("kids", "grad_hs", "attend_coll", "tract_unemp", "tract_welf", "tract_femalehead", "tract_minority", "interim_moves"),
                   outcome="evervoted_post")
plot(mods)
summary(mods$evervoted_post.ra_group_factor.kids)
summary(mods$evervoted_post.ra_group_factor.grad_hs)
summary(mods$evervoted_post.ra_group_factor.attend_coll)
summary(mods$evervoted_post.ra_group_factor.tract_unemp)
summary(mods$evervoted_post.ra_group_factor.tract_welf)
summary(mods$evervoted_post.ra_group_factor.tract_femalehead)
summary(mods$evervoted_post.ra_group_factor.tract_minority)
summary(med.out)

med.mod <- lm(tract_unemp~ra_group_factor+factor(ra_site), 
              data=dat,
              weights=f_wt_totcore98)
out.mod <- lm(evervoted_post~tract_unemp+ra_group_factor+factor(ra_site), 
              data=dat,
              weights=f_wt_totcore98)
med.out <- mediate(med.mod, out.mod, 
                   treat = "ra_group_factor", 
                   mediator = "tract_unemp",
                   sims = 100,
                   data = dat, 
                   cluster = out.mod$model$mto_pseudo_famid)
summary(med.out)


dat %>%
  group_by(age_group) %>%
  summarize(across(.cols=everything(), ~sum(is.na(.))/n())) %>%
  pivot_longer(cols=c(X:yt_educ), names_to = "var", values_to="prop_missing") %>%
  filter(age_group=="young_kid") %>%
  View()

# replicate treatment effects: youth

dat$age_final <- 2008 - dat$f_svy_yob_imp_ytgc


dat <- dat %>% 
  mutate(grad_hs = case_when(yed3c==1|hho3==2~1,
                             yed3c==5|hho3 %in% c(1,3)~0)) %>%
  mutate(attend_coll = case_when(yed3e %in% c(1,2)|hho4==1~1,
                                 yed3e %in% c(3:99)|hho4==5~0)) %>%
  mutate(work = case_when(yem1==1|hho5 %in% c(1,2)~1,
                          yem1>1|hho5>2~0)) %>%
  mutate(parent = case_when(yhl1d %in% c(1,2)|hho7>0~1,
                            yhl1d==5|hho7==0~0)) %>%
  mutate(jail = case_when(hho10a==1~1,
                          hho10a==5~0)) %>%
  mutate(tract_unemp = 1-f_c9010t_pemp_dw) %>%
  mutate(tract_welf = f_c9010t_pwelf_dw) %>%
  mutate(tract_femalehead = f_c9010t_pfsfem_dw) %>%
  mutate(tract_minority = f_c9010t_pminorty_dw)

dat$interim_moves <- NA
for(i in 1:nrow(dat)){
  famid = dat$mto_pseudo_famid[i]
  num = dat_full$a22[dat$mto_pseudo_famid==famid]
  num = num[!is.na(num)]
  if(length(num)>0){
    dat$interim_moves[i] <- num
  }
}
dat$interim_moves[dat$interim_moves==-1] <- NA

summary(lm_robust(grad_hs~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(attend_coll~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(work~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid"&dat$age_final>22,], clusters = mto_pseudo_famid))
summary(lm_robust(parent~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(jail~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_unemp~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_welf~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_femalehead~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(tract_minority~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))
summary(lm_robust(interim_moves~ra_group_factor+factor(ra_site), weights=f_wt_totcore98, data=dat[dat$age_group=="young_kid",], clusters = mto_pseudo_famid))


